from bokeh.plotting import figure, show
from bokeh.io import output_notebook
from bokeh.models import ColumnDataSource, LabelSet
from bokeh.models.formatters import NumeralTickFormatter
import pandas as pd
output_notebook()
<div class="bk-root">
<a href="https://bokeh.pydata.org" target="_blank" class="bk-logo bk-logo-small bk-logo-notebook"></a>
<span id="0aa7ba42-8fd8-4e98-a271-d0a17bbf6005">Loading BokehJS ...</span>
</div>
# Create the initial dataframe
index = ['sales','returns','credit fees','rebates','late charges','shipping']
data = {'amount': [350000,-30000,-7500,-25000,95000,-7000]}
df = pd.DataFrame(data=data,index=index)
# Determine the total net value by adding the start and all additional transactions
net = df['amount'].sum()
df
amount | |
---|---|
sales | 350000 |
returns | -30000 |
credit fees | -7500 |
rebates | -25000 |
late charges | 95000 |
shipping | -7000 |
# Create additional columns that we will use to build the waterfall
df['running_total'] = df['amount'].cumsum()
df['y_start'] = df['running_total'] - df['amount']
# Where do we want to place the label
df['label_pos'] = df['running_total']
df
amount | running_total | y_start | label_pos | |
---|---|---|---|---|
sales | 350000 | 350000 | 0 | 350000 |
returns | -30000 | 320000 | 350000 | 320000 |
credit fees | -7500 | 312500 | 320000 | 312500 |
rebates | -25000 | 287500 | 312500 | 287500 |
late charges | 95000 | 382500 | 287500 | 382500 |
shipping | -7000 | 375500 | 382500 | 375500 |
# We need to have a net column at the end with the totals and a full bar
df_net = pd.DataFrame.from_records([(net, net, 0, net)],
columns=['amount', 'running_total', 'y_start', 'label_pos'],
index=["net"])
df = df.append(df_net)
df
amount | running_total | y_start | label_pos | |
---|---|---|---|---|
sales | 350000 | 350000 | 0 | 350000 |
returns | -30000 | 320000 | 350000 | 320000 |
credit fees | -7500 | 312500 | 320000 | 312500 |
rebates | -25000 | 287500 | 312500 | 287500 |
late charges | 95000 | 382500 | 287500 | 382500 |
shipping | -7000 | 375500 | 382500 | 375500 |
net | 375500 | 375500 | 0 | 375500 |
# We want to color the positive values gray and the negative red
df['color'] = 'grey'
df.loc[df.amount < 0, 'color'] = 'red'
# The 10000 factor is used to make the text positioned correctly.
# You will need to modify if the values are significantly different
df.loc[df.amount < 0, 'label_pos'] = df.label_pos - 10000
df["bar_label"] = df["amount"].map('{:,.0f}'.format)
df
amount | running_total | y_start | label_pos | color | bar_label | |
---|---|---|---|---|---|---|
sales | 350000 | 350000 | 0 | 350000 | grey | 350,000 |
returns | -30000 | 320000 | 350000 | 310000 | red | -30,000 |
credit fees | -7500 | 312500 | 320000 | 302500 | red | -7,500 |
rebates | -25000 | 287500 | 312500 | 277500 | red | -25,000 |
late charges | 95000 | 382500 | 287500 | 382500 | grey | 95,000 |
shipping | -7000 | 375500 | 382500 | 365500 | red | -7,000 |
net | 375500 | 375500 | 0 | 375500 | grey | 375,500 |
# Build the Bokeh figure
# Limit the tools to only these three
TOOLS = "box_zoom,reset,save"
# Build the source data off the df dataframe
source = ColumnDataSource(df)
# Create the figure and assign range values that look good for the data set
p = figure(tools=TOOLS, x_range=list(df.index), y_range=(0, net+40000), plot_width=800, title = "Sales Waterfall")
p.grid.grid_line_alpha=0.3
# Add the segments
p.segment(x0='index', y0='y_start', x1="index", y1='running_total', source=source, color="color", line_width=55)
# Format the y-axis as dollars
p.yaxis[0].formatter = NumeralTickFormatter(format="($ 0 a)")
p.xaxis.axis_label = "Transactions"
# Add the labels
labels = LabelSet(x='index', y='label_pos', text='bar_label', text_font_size="8pt", level='glyph',
x_offset=-20, y_offset=0, source=source)
p.add_layout(labels)
show(p)
/Users/dereksnow/anaconda/envs/py36/lib/python3.6/site-packages/bokeh/core/json_encoder.py:80: FutureWarning: Conversion of the second argument of issubdtype from `float` to `np.floating` is deprecated. In future, it will be treated as `np.float64 == np.dtype(float).type`.
elif np.issubdtype(type(obj), np.float):